Setup

Load R libraries

library(data.table)
library(ggplot2)
library(ggpubr)
library(tidyr)
library(limma)
library(biomaRt)
library(fgsea)
library(goseq)

theme_set(theme_classic())

cell_type_name = params$cell_type_name
graph_weight = params$graph_weight

cell_type_name
## [1] "Micro-PVM"
graph_weight
## [1] "0.5"

Check enrichment of gene sets

Read in gene info and gene set assignments

file_tag = sprintf("%s_%s", cell_type_name, graph_weight)

assayed_genes = scan(sprintf("output/gene_list_%s.txt", file_tag), 
                     what = character(), sep="\n")

gene_sets = scan(sprintf("output/name_s_%s.txt", file_tag), 
                 what = character(), sep="\n")

gene_sets = sapply(gene_sets, strsplit, split=",")
n_genes   = sapply(gene_sets, length)
names(n_genes) = NULL
summary(n_genes)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    7.00   22.00   26.00   24.57   28.25   30.00
length(n_genes)
## [1] 40
sort(n_genes)
##  [1]  7 14 15 17 19 21 21 21 21 22 22 23 23 23 24 25 25 26 26 26 26 26 26 26 27
## [26] 27 27 28 28 28 29 29 29 29 29 29 29 30 30 30

Find gene symbols

Find gene symbols from bioMart.

All the gene symbols that can be found in bioMart are consistent with what we have. So no need to run it.

ensembl = useMart("ensembl", dataset = "hsapiens_gene_ensembl")

gene_BM = getBM(attributes = c("hgnc_symbol", "external_gene_name"), 
                filters = "external_gene_name", 
                values = assayed_genes, 
                mart = ensembl)
length(assayed_genes)
dim(gene_BM)
gene_BM[1:2,]

table(assayed_genes %in% gene_BM$external_gene_name)

t1 = table(gene_BM$external_gene_name)
dup = names(t1)[t1 > 1]
gene_BM[gene_BM$external_gene_name %in% dup,]

table(gene_BM$hgnc_symbol == gene_BM$external_gene_name)
w2kp = which(gene_BM$hgnc_symbol != gene_BM$external_gene_name)
gene_BM[w2kp,]

Find gene symbols using the alias2Symbol function from limma.

a2s = rep(NA, length(assayed_genes))
for(i in 1:length(assayed_genes)){
  gi = assayed_genes[i]
  ai = alias2Symbol(gi)
  if(length(ai) > 1){
    print(gi)
    print(ai)
  }
  a2s[i] = ai[1]
}

table(is.na(a2s))
## 
## FALSE  TRUE 
##  1739   261
table(a2s == assayed_genes, useNA = 'ifany')
## 
## FALSE  TRUE  <NA> 
##    21  1718   261
gene_info = data.table(sym_in_data = assayed_genes, sym_limma = a2s)

gene_info[sym_in_data != sym_limma,]
##     sym_in_data   sym_limma
##  1:       LRRC6     DNAAF11
##  2:    C11orf49      CSTPP1
##  3:   LINC00476 ERCC6L2-AS1
##  4:  HNRNPA1P48   HNRNPA1L3
##  5:  ZRANB2-AS2   ZRANB2-DT
##  6:   LINC00271     AHI1-DT
##  7:    RFX3-AS1     RFX3-DT
##  8:   LINC00884  ATP13A3-DT
##  9:   FBXO30-DT    EPM2A-DT
## 10:   LINC00894    EOLA2-DT
## 11:   COX10-AS1    COX10-DT
## 12:   LINC01184  SLC12A2-DT
## 13: FAM198B-AS1  GASK1B-AS1
## 14:     C5orf17   LINC02899
## 15: C8orf37-AS1 CFAP418-AS1
## 16:   LINC01146       HISLA
## 17:      BTBD11       ABTB3
## 18:       H2BU1      H2BC26
## 19:      SKIV2L       SKIC2
## 20:     FAM155A       NALF1
## 21:  CTB-41I6.2   PIK3R5-DT
##     sym_in_data   sym_limma
gene_info[, gene_symbol := sym_in_data]
gene_info[which(sym_in_data != sym_limma & (gene_symbol != "MT-CO2")), 
                gene_symbol := sym_limma]

dim(gene_info)
## [1] 2000    3
gene_info[1:5,]
##    sym_in_data sym_limma gene_symbol
## 1:        CFTR      CFTR        CFTR
## 2:        ICA1      ICA1        ICA1
## 3:        PDK4      PDK4        PDK4
## 4:       CALCR     CALCR       CALCR
## 5:       ABCB4     ABCB4       ABCB4
t1 = table(gene_info$gene_symbol)
table(t1)
## t1
##    1 
## 2000

Prepare gene set information

Gene set annotations (by gene symbols) were downloaded from MSigDB website.

gmtfile = list()
gmtfile[["reactome"]] = "../Annotation/c2.cp.reactome.v2023.2.Hs.symbols.gmt"
gmtfile[["go_bp"]]    = "../Annotation/c5.go.bp.v2023.2.Hs.symbols.gmt"

pathways = list()
for(k1 in names(gmtfile)){
  pathways[[k1]] = gmtPathways(gmtfile[[k1]])
}

names(pathways)
## [1] "reactome" "go_bp"
sapply(pathways, length)
## reactome    go_bp 
##     1692     7647

Filter gene sets for size between 10 and 500.

lapply(pathways, function(v){
  quantile(sapply(v, length), probs = seq(0, 1, 0.1), na.rm = TRUE)
})
## $reactome
##     0%    10%    20%    30%    40%    50%    60%    70%    80%    90%   100% 
##    5.0    7.0    9.0   12.0   17.0   23.0   31.0   44.0   71.8  120.9 1463.0 
## 
## $go_bp
##     0%    10%    20%    30%    40%    50%    60%    70%    80%    90%   100% 
##    5.0    6.0    8.0   10.0   14.0   19.0   29.0   46.0   80.8  183.0 1966.0
for(k1 in names(pathways)){
  p1 = pathways[[k1]]
  pathways[[k1]] = p1[sapply(p1, length) %in% 10:500]
}

Conduct enrichment analysis

dim(gene_info)
## [1] 2000    3
gene_info[1:2,]
##    sym_in_data sym_limma gene_symbol
## 1:        CFTR      CFTR        CFTR
## 2:        ICA1      ICA1        ICA1
gene_dat = fread(sprintf("data/%s_genes_info.csv", cell_type_name))
dim(gene_dat)
## [1] 36517     9
gene_dat[1:2,]
##           gene_ids feature_is_filtered feature_name feature_reference
## 1: ENSG00000000003               FALSE       TSPAN6    NCBITaxon:9606
## 2: ENSG00000000005               FALSE         TNMD    NCBITaxon:9606
##    feature_biotype n_cells_by_counts mean_counts pct_dropout_by_counts
## 1:            gene                76    0.002350               99.8100
## 2:            gene                 1    0.000025               99.9975
##    total_counts
## 1:           94
## 2:            1
length(unique(gene_info$sym_in_data))
## [1] 2000
table(gene_info$sym_in_data %in% gene_dat$feature_name)
## 
## TRUE 
## 2000
table(gene_dat$feature_name %in% gene_info$sym_in_data)
## 
## FALSE  TRUE 
## 34517  2000
gene_dat$selected = 0
gene_dat$selected[match(gene_info$sym_in_data, gene_dat$feature_name)] = 1

table(gene_dat$selected)
## 
##     0     1 
## 34517  2000
tapply(gene_dat$pct_dropout_by_counts, gene_dat$selected, summary)
## $`0`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   94.48   99.58   93.53   99.97  100.00 
## 
## $`1`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.6475 65.5944 84.3650 76.3053 93.6756 97.9950
gene_info = merge(gene_info, gene_dat, by.x="sym_in_data", 
                  by.y="feature_name", all.x = TRUE, all.y = FALSE)
dim(gene_info)
## [1] 2000   12
gene_info[1:2,]
##    sym_in_data sym_limma gene_symbol        gene_ids feature_is_filtered
## 1:        AATK      AATK        AATK ENSG00000181409               FALSE
## 2:        ABAT      ABAT        ABAT ENSG00000183044               FALSE
##    feature_reference feature_biotype n_cells_by_counts mean_counts
## 1:    NCBITaxon:9606            gene              3157    0.091525
## 2:    NCBITaxon:9606            gene              9074    0.368950
##    pct_dropout_by_counts total_counts selected
## 1:               92.1075         3661        1
## 2:               77.3150        14758        1
max_n2kp = 10

goseq_res = NULL

for(k in 1:length(gene_sets)){
  if(length(gene_sets[[k]]) < 10) { next }
  
  print(k)
  set_k = paste0("set_", k)
  print(gene_sets[[k]])
  
  genes = gene_info$sym_in_data %in% gene_sets[[k]]
  names(genes) = gene_info$gene_symbol
  table(genes)
  
  pwf = nullp(genes, "hg38", "geneSymbol", 
              bias.data = 100 - gene_info$pct_dropout_by_counts)
  
  for(k1 in names(pathways)){
    p1 = pathways[[k1]]
    res1 = goseq(pwf, "hg38", "geneSymbol", 
                 gene2cat=goseq:::reversemapping(p1))
    res1$FDR  = p.adjust(res1$over_represented_pvalue, method="BH")
    
    nD = sum(res1$FDR < 0.05)
    
    if(nD > 0){
      res1 = res1[order(res1$FDR),][1:min(nD, max_n2kp),]
      res1$category = gsub("REACTOME_|GOBP_", "", res1$category)
      res1$category = gsub("_", " ", res1$category)
      res1$category = tolower(res1$category)
      res1$category = substr(res1$category, start=1, stop=81)
      goseq_res[[set_k]][[k1]] = res1
    }
  }
}
## [1] 1
##  [1] "MCTP2"         "ADAMTS3"       "GHRL"          "HESX1"        
##  [5] "UCN"           "WDR25"         "EYS"           "PAPSS2"       
##  [9] "CARNMT1-AS1"   "MTCP1"         "LINC01277"     "LINC00271"    
## [13] "LINC02884"     "RPL34-DT"      "RP11-195C7.1"  "KCNMA1-AS1"   
## [17] "RP11-544A12.8" "STARD4-AS1"    "C5orf17"       "LINC02196"    
## [21] "MIR100HG"      "RP1-71H24.1"   "LINC02444"     "RP11-58G13.1" 
## [25] "RP11-354K4.2"  "TRG-AS1"       "RP11-237N2.1"  "RP11-314E10.1"

## [1] 2
##  [1] "MTERF1"        "PIGC"          "ADAMTS19"      "ADGRG5"       
##  [5] "BEX3"          "FLJ40194"      "TMEM151B"      "PCED1B"       
##  [9] "H2BC4"         "H2BC18"        "COL5A2"        "ENTPD3-AS1"   
## [13] "TEX41"         "ZNF451-AS1"    "RP4-568F9.6"   "RP11-557H15.4"
## [17] "LINC02642"     "BDNF-AS"       "LINC02057"     "AC008697.1"   
## [21] "CTD-2537O9.1"  "RP11-370I10.6" "H2AC19"        "H2BC8"        
## [25] "H2AC8"         "RP11-495L19.1" "RP11-321P16.3" "RP11-399K19.1"

## [1] 3
##  [1] "SIGLEC1"       "ADGRD1"        "F13A1"         "CH25H"        
##  [5] "TMEM44"        "BANK1"         "ADAMTS15"      "ODF3B"        
##  [9] "RP11-445F6.2"  "LINC01515"     "AC002539.1"    "RP3-525N10.2" 
## [13] "AC007682.1"    "RP11-142A5.1"  "LINC02211"     "FAM66D"       
## [17] "OVCH1-AS1"     "CTD-2336H13.2" "LINC02073"     "CERNA2"       
## [21] "LINC02885"     "RP11-38B6.1"

## [1] 4
##  [1] "APBB1IP"   "ACER3"     "PLEK"      "TBC1D14"   "NPL"       "ARMH4"    
##  [7] "GAREM1"    "RASGEF1C"  "AOPEP"     "DISP1"     "DEPTOR"    "ATF7IP2"  
## [13] "MARCHF3"   "DLEU1"     "SLC8A1"    "SCFD2"     "PRKN"      "MBD5"     
## [19] "HMGA1P4"   "XACT"      "LINC01094" "RNASET2"   "ARHGAP6"   "PADI2"    
## [25] "FMN1"

## [1] 5
##  [1] "CELSR3"        "GALNT16"       "LILRB5"        "HS3ST2"       
##  [5] "NRG2"          "JAML"          "PLPP3"         "RANBP3L"      
##  [9] "FSTL5"         "STXBP6"        "HPSE2"         "LRRC3B"       
## [13] "ADGRB1"        "VMO1"          "POTEG"         "ZNF521"       
## [17] "FAM153CP"      "FAM229A"       "RP11-47I22.3"  "AF064858.8"   
## [21] "TRHDE-AS1"     "AC002066.1"    "RP11-696N14.1" "CPEB2-DT"     
## [25] "SELENOP"       "RP11-79E3.2"   "RP11-661A12.5" "RP11-429A20.3"
## [29] "RP11-396B14.2" "RP11-436D23.1"

## [1] 6
##  [1] "MMP24"         "C4orf19"       "CYTL1"         "ENO4"         
##  [5] "FAM177B"       "CLEC9A"        "INSYN2B"       "ANKUB1"       
##  [9] "ELMO1-AS1"     "TMLHE-AS1"     "ENTPD1-AS1"    "AC096558.1"   
## [13] "SRGAP2-AS1"    "AC037445.1"    "RP11-528G1.2"  "DIAPH2-AS1"   
## [17] "LINC01141"     "MEF2C-AS2"     "LINC02742"     "RP11-649G15.2"
## [21] "RP11-452H21.1" "CTD-2647E9.3"  "RP11-815J21.4" "ABHD15-AS1"   
## [25] "RP11-16C1.3"   "RP11-793A3.2"  "CTD-3149D2.4"  "CTD-2282P23.2"
## [29] "DISC2"

## [1] 7
##  [1] "NAV3"        "LY96"        "FGGY"        "ZDHHC14"     "KCNQ3"      
##  [6] "MIR99AHG"    "MEF2C-AS1"   "LINC02232"   "ATP6V0D1-DT" "KCNQ2"      
## [11] "CUX2"        "FRMD4B"      "WDR45B"      "SLC2A5"      "HSPA12A"    
## [16] "RBFOX3"      "MYO1D"       "KCNQ5"       "ZNF846"      "ZNF407"     
## [21] "LNCAROD"

## [1] 8
##  [1] "TENT5A"        "CYREN"         "TMEM241"       "RALGPS1"      
##  [5] "SLC49A4"       "GASK1A"        "PUS10"         "GASK1B"       
##  [9] "ACYP2"         "RNLS"          "LIPE-AS1"      "ALOX12-AS1"   
## [13] "HDAC2-AS2"     "COX10-AS1"     "MIR4300HG"     "RP11-10H3.1"  
## [17] "C8orf37-AS1"   "RP11-554D14.6" "LINC02328"     "RP11-187O7.3" 
## [21] "MIR222HG"      "RP11-323F24.5" "RP1-111C20.3"  "RP11-574F11.4"
## [25] "ATOH8"         "KLF13"

## [1] 9
##  [1] "IFT43"   "ATP5F1E" "STAB1"   "LCP2"    "HDHD5"   "SESN1"   "DOCK3"  
##  [8] "BICRAL"  "GTDC1"   "AFDN"    "PRAM1"   "CCDC40"  "NR6A1"   "RSU1"   
## [15] "SLC2A13" "SH3RF1"  "ST3GAL2" "NEK10"   "PRKG1"   "AIDA"    "GLDN"   
## [22] "PARVB"   "IARS1"

## [1] 10
##  [1] "SNCAIP"    "PITPNM2"   "PROCR"     "IL1A"      "C3"        "FGF13"    
##  [7] "STON2"     "RNF157"    "KCNMB1"    "KCNMA1"    "TSPAN7"    "TAGAP"    
## [13] "RIC3"      "P2RY12"    "XIST"      "LINC01091" "MMP28"     "RGCC"     
## [19] "SH3BP5"    "LPL"       "KCNE1"

## [1] 12
##  [1] "OPHN1"        "CASS4"        "RIPOR2"       "PDE10A"       "PLCL1"       
##  [6] "AHI1"         "WDPCP"        "UNC80"        "IRAK1BP1"     "TENM4"       
## [11] "CACNA1D"      "SMIM14"       "ZNF608"       "CAPS2"        "FIGN"        
## [16] "RTN4RL1"      "COL25A1"      "PRDX6-AS1"    "AC011288.2"   "LINC00894"   
## [21] "GMDS-DT"      "RP11-673E1.1" "RP11-160E2.6" "RP4-681L3.3"  "HIVEP2"      
## [26] "MCTP1"

## [1] 13
##  [1] "SLC1A3"   "RAPGEF1"  "PTPRE"    "SSH2"     "FER"      "MGAT5"   
##  [7] "FMNL2"    "CDC42SE2" "RUNX1"    "CLASP2"   "OXR1"     "LDLRAD4" 
## [13] "GPHN"     "BNC2"     "CHD2"     "PACS1"    "PCDH9"    "PDE4B"   
## [19] "NEAT1"

## [1] 14
##  [1] "NIPAL2"        "CPED1"         "GPAT3"         "SESN3"        
##  [5] "ARHGAP42"      "CYSLTR1"       "RAB39A"        "CATSPERE"     
##  [9] "CDIN1"         "SDHAF3"        "MAMDC2-AS1"    "FIRRE"        
## [13] "CTA-292E10.6"  "RP11-37B2.1"   "RP11-630C16.2" "RP11-946L16.1"
## [17] "MIR223HG"      "SNX25"         "OGFRL1"        "SEM1"         
## [21] "RASGEF1B"      "C17orf67"      "CLEC5A"

## [1] 15
##  [1] "SLC5A9"        "RAB38"         "CFAP74"        "TCERG1L"      
##  [5] "SPDYE2"        "RP11-165F24.3" "RP11-526K17.2" "HCG22"        
##  [9] "LINC00960"     "RP11-712B9.2"  "RP11-381K20.2" "RP11-39E3.3"  
## [13] "RP11-434H14.1" "RP11-493L12.3" "RP11-317G6.1"  "PRKCA-AS1"    
## [17] "H2BC7"         "RP11-622O11.6" "COL5A3"        "SLC26A7"      
## [21] "SNED1"         "HTR1E"         "DIRC3"         "DARS1-AS1"    
## [25] "CYP1B1-AS1"    "AC092835.2"    "MIR155HG"      "RP11-384C4.7" 
## [29] "LINC01138"

## [1] 16
##  [1] "SYNDIG1"       "FAM149A"       "SPATA6"        "C1QC"         
##  [5] "MIR31HG"       "LINC01357"     "FTCDNL1"       "RP11-162D16.2"
##  [9] "RP11-120D5.1"  "LINC01684"     "CEACAM16-AS1"  "LINC00907"    
## [13] "KCNQ1OT1"      "RP11-74J13.8"  "EIF1B-AS1"     "RP11-437L7.2" 
## [17] "RP11-685G11.1" "APBB3"         "DARS1"         "COL5A1"       
## [21] "MFSD4B"        "H2AC6"         "ZC3H6"         "CTD-2643I7.5" 
## [25] "RP11-707P17.1" "RP11-452F19.4"

## [1] 17
##  [1] "ZNF532"                    "ARHGEF18"                 
##  [3] "PLAGL1"                    "FLVCR2"                   
##  [5] "PUDP"                      "PCBD2"                    
##  [7] "ARHGAP20"                  "SAMSN1"                   
##  [9] "DENND2B"                   "CDYL2"                    
## [11] "SPTLC3"                    "MICOS10"                  
## [13] "PGBD5"                     "B3GLCT"                   
## [15] "TMSB4X"                    "LINC00630"                
## [17] "RP11-535M15.1"             "SLC16A1-AS1"              
## [19] "LMCD1-AS1"                 "PVT1"                     
## [21] "LIX1-AS1"                  "LINC01605_ENSG00000253161"
## [23] "RP11-588H23.3"             "LINC01146"                
## [25] "RP11-281A20.2"             "RP11-692P14.1"            
## [27] "RP11-111G13.1"

## [1] 18
##  [1] "PRKCQ"    "PKD2L2"   "ANGPT2"   "MAP2K6"   "NEK11"    "PTPRB"   
##  [7] "FMO5"     "PDE5A"    "EFCAB11"  "MED12L"   "MTHFD2L"  "TMEM52B" 
## [13] "SLC35G2"  "KCNMB2"   "LTC4S"    "KIF9-AS1" "CNTN1"    "HGF"     
## [19] "RASGRP2"  "EMILIN2"  "CRIM1"    "CAMK4"    "KCND3"    "C11orf80"
## [25] "PPM1E"    "NELL2"

## [1] 19
##  [1] "LRRC7"        "ARHGAP10"     "RAPGEF4"      "FGF14"        "ADAM23"      
##  [6] "ATP8A2"       "PDZD2"        "STXBP5L"      "PAM"          "TENM2"       
## [11] "ADGRL3"       "NCAM2"        "CSMD3"        "NLGN1"        "CTNND2"      
## [16] "FUT9"         "CADM2"        "RIMS2"        "NRXN1"        "FLRT2"       
## [21] "NRG3"         "TAFA2"        "LINC00632"    "SNHG14"       "RP11-384F7.2"
## [26] "IQCJ-SCHIP1"

## [1] 20
##  [1] "SGCE"           "C11orf65"       "RP11-321C24.1"  "ARHGAP26-AS1"  
##  [5] "ALG13-AS1"      "RP11-353M9.1"   "MYCBP2-AS1"     "CCDC200"       
##  [9] "AC018890.6"     "ZBTB20-AS5"     "CTD-2015H6.3"   "RP11-268P4.5"  
## [13] "CTD-3239E11.2"  "RP11-238K6.1"   "RP11-513G19.1"  "RP11-571M6.7"  
## [17] "RP11-136F16.1"  "RP11-713N11.4"  "USP3-AS1"       "RP11-16B9.1"   
## [21] "RP11-106M3.3"   "RNF213-AS1"     "RP11-737O24.2"  "RP11-53B2.3"   
## [25] "RP4-769N13.7"   "RP11-793H13.14" "RP11-458A7.1"   "RP11-1146N6.3" 
## [29] "RP11-685G9.5"   "CTD-2021K4.2"

## [1] 21
##  [1] "PER3"      "CYFIP2"    "DHRS9"     "RPS6KA5"   "STMN2"     "ABCG2"    
##  [7] "SLC22A23"  "PTPN14"    "CCDC171"   "PLEKHA7"   "GAP43"     "SVIL"     
## [13] "LINC01278" "FOSL2"     "SLC16A10"  "CYTIP"     "HIP1"      "PARP8"    
## [19] "SRSF12"    "AGAP1"     "FAM110B"   "ZFPM2"

## [1] 22
##  [1] "ANGPTL1"       "ITGA9"         "AGBL3"         "LINC00539"    
##  [5] "AC017101.10"   "RP1-225E12.2"  "COA6-AS1"      "AC083884.8"   
##  [9] "RP11-666F17.1" "DENND6A-AS1"   "RP11-692D12.1" "RP11-679C8.2" 
## [13] "RP11-703M24.5" "RP11-7F3.1"    "AC009166.9"    "ZFY"          
## [17] "PXDNL"         "RNF150"        "LINC00910"     "H1-0"         
## [21] "KLF3-AS1"      "LINC02646"     "RP11-351A11.1" "HIF1A-AS3"    
## [25] "RP11-322M13.1" "CTD-2285E13.1" "RP11-404C6.6"

## [1] 23
##  [1] "NEURL2"                  "LILRB2"                 
##  [3] "PEBP4"                   "IGSF11"                 
##  [5] "ADGRA3"                  "LINC00467"              
##  [7] "SLC9B1"                  "H2AC20"                 
##  [9] "FSIP2"                   "LINC01285"              
## [11] "ATP6AP1L"                "AC003090.1"             
## [13] "RMDN2-AS1"               "LINC01376"              
## [15] "MEIKIN"                  "RP11-246A10.1"          
## [17] "ALG1L9P_ENSG00000248671" "AC006160.5"             
## [19] "RP11-1H15.2"             "RP11-511B23.2"          
## [21] "RP11-638I2.8"            "LINC00639"              
## [23] "RP11-221G19.1"           "RP1-18C9.3"             
## [25] "RP11-21M24.6"            "RP11-65F13.4"           
## [27] "RP11-56I23.2"            "AMER2"                  
## [29] "LINC00970"

## [1] 24
##  [1] "LINC00996"               "SLC26A3"                
##  [3] "HELLS"                   "CHI3L1"                 
##  [5] "LYPD5"                   "TNFRSF11B"              
##  [7] "LINC02397"               "ARL17B"                 
##  [9] "MYO16-AS1"               "RP3-467L1.4"            
## [11] "RP11-657O9.1"            "RP11-13N12.1"           
## [13] "MTRNR2L1"                "RP11-81K2.1"            
## [15] "SCARNA2_ENSG00000270066" "LINC00551"              
## [17] "CH507-513H4.1"

## [1] 25
##  [1] "PCSK1N"         "IER3"           "ARL5C"          "AZIN2"         
##  [5] "NLGN4Y"         "MIR4435-2HG"    "GLIPR1L1"       "TMEM72-AS1"    
##  [9] "RP1-30E17.2"    "SLC8A1-AS1"     "MIR646HG"       "KCNMB2-AS1"    
## [13] "ARHGAP8"        "STX18-AS1"      "ANK2-AS1"       "RP11-281P23.2" 
## [17] "ZFPM2-AS1"      "MAILR"          "AP000487.6"     "RP11-516C1.1"  
## [21] "LINC00621"      "RP11-399K21.11" "LINC02712"      "RP11-305E17.8" 
## [25] "RP11-19D22.1"   "RP11-264E23.4"  "RP11-555K12.4"  "PCDH11Y"

## [1] 26
##  [1] "PEX7"          "RAB40B"        "ARAP2"         "ABCA7"        
##  [5] "FAM107B"       "SLC9A7"        "RNMT"          "IL4I1"        
##  [9] "DCUN1D4"       "ZC4H2"         "BTG1"          "ADCY3"        
## [13] "FAM117B"       "COLEC12"       "CCDC107"       "FRMD5"        
## [17] "RBM44"         "ARL4C"         "SYCP2"         "H2BU1"        
## [21] "CR1L"          "CD247"         "DENND1B"       "AS3MT"        
## [25] "USP2-AS1"      "RP11-368L12.1"

## [1] 27
##  [1] "P3H2"       "AKAP7"      "ACVR2A"     "PLAU"       "SMAD6"     
##  [6] "CCDC102B"   "PRMT9"      "SLC9A9"     "KCNIP1"     "DYNC2H1"   
## [11] "HLA-DRA"    "CYTH1"      "AGAP3"      "CSGALNACT1" "EVA1C"

## [1] 28
##  [1] "RPL3"       "RPL18A"     "RPL19"      "RPL34"      "RPL24"     
##  [6] "LDAH"       "RPS24"      "RPS2"       "RPS11"      "RPL11"     
## [11] "RPS27A"     "RPL32"      "RAB28"      "RPL26"      "RPL38"     
## [16] "RPLP2"      "RPS23"      "RPL12"      "RPL23A"     "ZNF433-AS1"
## [21] "ADARB1"

## [1] 29
##  [1] "MAP3K20"       "ARRDC2"        "CDH26"         "KIAA0513"     
##  [5] "CCNG2"         "PGGHG"         "TIAM2"         "AMN1"         
##  [9] "CFAP161"       "ANKRD30BL"     "MARCHF8"       "PDZRN4"       
## [13] "CXXC5"         "LINC00862"     "RP5-864K19.4"  "SGO1-AS1"     
## [17] "RP11-147G16.1" "RP11-154D17.1" "RP11-305L7.3"  "SLC5A4-AS1"   
## [21] "LINC01322"     "RP11-20D14.3"  "FMNL1-DT"      "HCG17"        
## [25] "RP11-154H23.4" "AC003099.2"

## [1] 30
##  [1] "ST6GAL1"  "FRMD4A"   "SFMBT2"   "DENND1A"  "CHSY1"    "DENND4C" 
##  [7] "TLR2"     "SSBP2"    "ARHGAP18" "JAK1"     "RBM47"    "AXL"     
## [13] "TANC2"    "TLR5"

## [1] 31
##  [1] "PLPP1"      "CARMIL1"    "NME3"       "PLGRKT"     "TMEM156"   
##  [6] "MMS22L"     "TMEM144"    "FILIP1L"    "SRGAP3"     "LINC01356" 
## [11] "PTCHD4"     "ST3GAL1"    "PDE8A"      "PILRA"      "RGS1"      
## [16] "GRK3"       "SRGN"       "SEC14L1"    "TMEM65"     "ROBO1"     
## [21] "CYRIA"      "MIR181A1HG" "IFITM10"

## [1] 32
##  [1] "TTC7A"         "HAMP"          "PIGL"          "BTG2"         
##  [5] "LIPC"          "MACROD2"       "TTTY14"        "DLEU7"        
##  [9] "LINC02649"     "AC007879.5"    "RP11-202G18.1" "CCDC26"       
## [13] "RFX3-AS1"      "LINC00884"     "RNF217-AS1"    "RP11-360F5.1" 
## [17] "LINC02762"     "CTC-575N7.1"   "RP11-624C23.1" "RP11-463D19.1"
## [21] "SMIM35"        "RP11-196H14.2" "RP11-323I15.5" "COPG2IT1"

## [1] 33
##  [1] "KIF26B"   "ADAM22"   "PDE4A"    "CELF4"    "MOB3B"    "TGFBI"   
##  [7] "PPFIA2"   "CACNA1B"  "ARID5B"   "CACNA1C"  "CACNA2D1" "FMN2"    
## [13] "SORCS3"   "SYN2"     "GRM5"     "GRIN1"    "CADM1"    "SYN3"    
## [19] "MYT1L"    "DPYD"     "RYR2"

## [1] 34
##  [1] "CALCR"         "MBNL3"         "ADGRG6"        "PID1"         
##  [5] "NMNAT3"        "BEAN1"         "TMC7"          "LINC00476"    
##  [9] "ARSJ"          "SCOC-AS1"      "AC105760.2"    "ZRANB2-AS2"   
## [13] "RP5-1198O20.4" "ITGA9-AS1"     "RP11-634B7.4"  "RORA-AS1"     
## [17] "MIR3142HG"     "LINC00506"     "RP11-622I12.1" "RP11-1H15.4"  
## [21] "RP11-6G22.1"   "MAMLD1"        "SPOCK2"        "TBC1D10C"     
## [25] "LINC01435"     "RP1-179N16.6"  "AGBL1"

## [1] 35
##  [1] "CHD5"          "OLFM1"         "CEMIP2"        "CBLN2"        
##  [5] "CDH18"         "CNTN5"         "NPAS3"         "ME3"          
##  [9] "AK5"           "PALM2AKAP2"    "CLSTN2"        "ABCG1"        
## [13] "CAMK2N1"       "LRFN5"         "NEGR1"         "RESF1"        
## [17] "H1-10"         "LSAMP"         "PTPRT"         "CACNA1E"      
## [21] "TOX"           "FAM155A"       "MEG3"          "RP11-191L9.4" 
## [25] "MIAT"          "SHISA9"        "RP11-17A1.3"   "RP11-259O2.1" 
## [29] "RP11-111A21.1"

## [1] 36
##  [1] "ATP8B1"        "IL7"           "SSPN"          "SHOC1"        
##  [5] "PLD4"          "AC002463.3"    "HNRNPA1P48"    "AC105461.1"   
##  [9] "RP11-142M10.2" "ARHGAP15-AS1"  "RP5-1132H15.1" "LANCL1-AS1"   
## [13] "RP11-575L7.8"  "FBXO30-DT"     "CPB2-AS1"      "RP1-117O3.2"  
## [17] "KDM4A-AS1"     "BCL2L1-AS1"    "RP11-544A12.4" "LRIG2-DT"     
## [21] "RP11-775D22.3" "RHOQ-AS1"      "RP11-506H20.1" "LMO7-AS1"     
## [25] "RP11-66H6.3"   "RP11-126O1.4"  "RP1-256G22.2"  "RP11-261A24.1"
## [29] "RP11-168F24.3" "RP11-13J12.3"

## [1] 37
##  [1] "XKR6"      "SKA2"      "KIAA0825"  "KATNAL1"   "PRKAG2"    "CLIP4"    
##  [7] "MXI1"      "PATJ"      "CLVS2"     "ST18"      "PITPNC1"   "RASA2"    
## [13] "ZFYVE28"   "EDIL3"     "DLGAP1"    "JAKMIP2"   "SAMD12"    "CSMD1"    
## [19] "OPCML"     "RALYL"     "KCNIP4"    "SLIT1"     "KAZN"      "BASP1-AS1"
## [25] "SATB1-AS1"

## [1] 38
##  [1] "ABCB4"         "OMD"           "ACKR4"         "ADAMTS17"     
##  [5] "ABCA8"         "GLT1D1"        "FOLR2"         "MT1E"         
##  [9] "RAB37"         "MCMDC2"        "RP11-131L23.1" "EDNRB-AS1"    
## [13] "SMCR5"         "AC073115.7"    "AC092431.3"    "KIF5C-AS1"    
## [17] "AC073115.6"    "RP5-1101C3.1"  "CTB-161M19.4"  "MTRNR2L8"     
## [21] "RP11-867G2.8"  "PRANCR"        "PSMD7-DT"      "TCF4-AS1"     
## [25] "RP11-121C2.3"  "RP11-413N10.3" "RP11-275G7.2"  "RP11-4F5.3"   
## [29] "GADD45B"

## [1] 39
##  [1] "NEXMIF"        "CRYM"          "LRRC6"         "CALY"         
##  [5] "DUOX1"         "HERC6"         "RNF180"        "DOK6"         
##  [9] "TSBP1-AS1"     "RP13-188A5.1"  "PCDH9-AS2"     "IPO9-AS1"     
## [13] "RAP2C-AS1"     "RP11-123B3.2"  "RP11-120A1.1"  "RP11-541P9.3" 
## [17] "ZNF10"         "RP11-493L12.4" "LINC02316"     "CEROX1"       
## [21] "ZNF528-AS1"    "LINC02666"     "RP11-314L11.1" "RP1-80B9.4"   
## [25] "RP4-545L17.11" "RP11-320L2.1"  "RP11-617F9.2"  "LINC01762"    
## [29] "SAP30-DT"

## [1] 40
##  [1] "ZNF804B"      "SYT7"         "ADGRL1"       "IGSF9B"       "NALCN"       
##  [6] "KCNT1"        "ARFGEF3"      "DNAH6"        "CSMD2"        "ATP8A1"      
## [11] "PCSK2"        "GPNMB"        "ZNF365"       "BTBD11"       "ZNF385D"     
## [16] "SPOCK1"       "FBXO32"       "GABRB1"       "PRICKLE2"     "ARHGEF3"     
## [21] "ADGRV1"       "SRRM3"        "TMTC2"        "GPC5"         "KCTD16"      
## [26] "RORB"         "CTC-340A15.2" "LINC01414"    "CTC-535M15.2"

for(n1 in names(goseq_res)){
  k = as.numeric(gsub("set_", "", n1))
  print(n1)
  print(gene_sets[[k]])
  print(goseq_res[[n1]])

}
## [1] "set_2"
##  [1] "MTERF1"        "PIGC"          "ADAMTS19"      "ADGRG5"       
##  [5] "BEX3"          "FLJ40194"      "TMEM151B"      "PCED1B"       
##  [9] "H2BC4"         "H2BC18"        "COL5A2"        "ENTPD3-AS1"   
## [13] "TEX41"         "ZNF451-AS1"    "RP4-568F9.6"   "RP11-557H15.4"
## [17] "LINC02642"     "BDNF-AS"       "LINC02057"     "AC008697.1"   
## [21] "CTD-2537O9.1"  "RP11-370I10.6" "H2AC19"        "H2BC8"        
## [25] "H2AC8"         "RP11-495L19.1" "RP11-321P16.3" "RP11-399K19.1"
## $reactome
##                                                                               category
## 412                                                            hats acetylate histones
## 415                                                                   hcmv late events
## 416                                                         hdacs deacetylate histones
## 1164                                                  ub specific processing proteases
## 413                                                                  hcmv early events
## 414                                                                     hcmv infection
## 147                                                        chromatin modifying enzymes
## 226                                                                   deubiquitination
## 9    activated pkn1 stimulates transcription of ar androgen receptor regulated genes k
## 76                            assembly of the orc complex at the origin of replication
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 412             2.965695e-08                        1          5        9
## 415             5.778771e-08                        1          5       11
## 416             6.316395e-08                        1          5       11
## 1164            3.170673e-07                        1          5       16
## 413             3.274704e-07                        1          5       14
## 414             4.014105e-07                        1          5       15
## 147             1.143854e-06                        1          5       20
## 226             1.512997e-06                        1          5       24
## 9               1.930756e-06                        1          4        8
## 76              1.930756e-06                        1          4        8
##               FDR
## 412  2.488660e-05
## 415  2.488660e-05
## 416  2.488660e-05
## 1164 7.741401e-05
## 413  7.741401e-05
## 414  7.907787e-05
## 147  1.311299e-04
## 226  1.311299e-04
## 9    1.311299e-04
## 76   1.311299e-04
## 
## [1] "set_7"
##  [1] "NAV3"        "LY96"        "FGGY"        "ZDHHC14"     "KCNQ3"      
##  [6] "MIR99AHG"    "MEF2C-AS1"   "LINC02232"   "ATP6V0D1-DT" "KCNQ2"      
## [11] "CUX2"        "FRMD4B"      "WDR45B"      "SLC2A5"      "HSPA12A"    
## [16] "RBFOX3"      "MYO1D"       "KCNQ5"       "ZNF846"      "ZNF407"     
## [21] "LNCAROD"    
## $reactome
##                              category over_represented_pvalue
## 1177 voltage gated potassium channels            1.791959e-05
##      under_represented_pvalue numDEInCat numInCat        FDR
## 1177                0.9999999          3        7 0.02118095
## 
## [1] "set_28"
##  [1] "RPL3"       "RPL18A"     "RPL19"      "RPL34"      "RPL24"     
##  [6] "LDAH"       "RPS24"      "RPS2"       "RPS11"      "RPL11"     
## [11] "RPS27A"     "RPL32"      "RAB28"      "RPL26"      "RPL38"     
## [16] "RPLP2"      "RPS23"      "RPL12"      "RPL23A"     "ZNF433-AS1"
## [21] "ADARB1"    
## $reactome
##                                              category over_represented_pvalue
## 138                   cellular response to starvation                       0
## 301                 eukaryotic translation elongation                       0
## 302                 eukaryotic translation initiation                       0
## 452                               influenza infection                       0
## 557         metabolism of amino acids and derivatives                       0
## 640                       nonsense mediated decay nmd                       0
## 801       regulation of expression of slits and robos                       0
## 850 response of eif2ak4 gcn2 to amino acid deficiency                       0
## 901                                   rrna processing                       0
## 927                       selenoamino acid metabolism                       0
##     under_represented_pvalue numDEInCat numInCat FDR
## 138                        1         17       26   0
## 301                        1         17       23   0
## 302                        1         17       23   0
## 452                        1         17       27   0
## 557                        1         17       41   0
## 640                        1         17       23   0
## 801                        1         17       27   0
## 850                        1         17       25   0
## 901                        1         17       25   0
## 927                        1         17       26   0
## 
## $go_bp
##                                     category over_represented_pvalue
## 630                  cytoplasmic translation            0.000000e+00
## 4283                     ribosome biogenesis            0.000000e+00
## 4279      ribosomal large subunit biogenesis            5.300278e-10
## 4278        ribosomal large subunit assembly            4.261838e-08
## 4281      ribosomal small subunit biogenesis            2.070857e-07
## 4282                       ribosome assembly            2.095572e-07
## 3144        protein rna complex organization            7.898271e-06
## 2221 non membrane bounded organelle assembly            5.073840e-05
##      under_represented_pvalue numDEInCat numInCat          FDR
## 630                 1.0000000         17       23 0.000000e+00
## 4283                1.0000000          8       13 0.000000e+00
## 4279                1.0000000          5        5 8.339104e-07
## 4278                1.0000000          4        4 5.028969e-05
## 4281                1.0000000          4        5 1.648517e-04
## 4282                1.0000000          4        5 1.648517e-04
## 3144                0.9999999          4       10 5.325691e-03
## 2221                0.9999976          5       30 2.993565e-02
## 
## [1] "set_33"
##  [1] "KIF26B"   "ADAM22"   "PDE4A"    "CELF4"    "MOB3B"    "TGFBI"   
##  [7] "PPFIA2"   "CACNA1B"  "ARID5B"   "CACNA1C"  "CACNA2D1" "FMN2"    
## [13] "SORCS3"   "SYN2"     "GRM5"     "GRIN1"    "CADM1"    "SYN3"    
## [19] "MYT1L"    "DPYD"     "RYR2"    
## $go_bp
##                               category over_represented_pvalue
## 280 calcium ion transport into cytosol            8.454614e-07
##     under_represented_pvalue numDEInCat numInCat         FDR
## 280                        1          4        5 0.003990578
saveRDS(goseq_res, sprintf("output/gene_set_enrichments_%s.RDS", 
                           file_tag))

Session information

gc()
##            used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells  8955014 478.3   16112782 860.6         NA 16112782 860.6
## Vcells 16664170 127.2   31295066 238.8      65536 31295066 238.8
sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.4.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] goseq_1.50.0           geneLenDataBase_1.34.0 BiasedUrn_2.0.10      
##  [4] fgsea_1.24.0           biomaRt_2.54.1         limma_3.54.2          
##  [7] tidyr_1.3.0            ggpubr_0.6.0           ggplot2_3.4.2         
## [10] data.table_1.14.8     
## 
## loaded via a namespace (and not attached):
##   [1] nlme_3.1-162                matrixStats_1.0.0          
##   [3] bitops_1.0-7                bit64_4.0.5                
##   [5] filelock_1.0.2              progress_1.2.2             
##   [7] httr_1.4.6                  GenomeInfoDb_1.34.9        
##   [9] tools_4.2.3                 backports_1.4.1            
##  [11] bslib_0.4.2                 utf8_1.2.3                 
##  [13] R6_2.5.1                    mgcv_1.8-42                
##  [15] DBI_1.1.3                   BiocGenerics_0.44.0        
##  [17] colorspace_2.1-0            withr_2.5.0                
##  [19] tidyselect_1.2.0            prettyunits_1.1.1          
##  [21] bit_4.0.5                   curl_5.0.1                 
##  [23] compiler_4.2.3              cli_3.6.1                  
##  [25] Biobase_2.58.0              xml2_1.3.4                 
##  [27] DelayedArray_0.24.0         rtracklayer_1.58.0         
##  [29] sass_0.4.5                  scales_1.2.1               
##  [31] rappdirs_0.3.3              Rsamtools_2.14.0           
##  [33] stringr_1.5.0               digest_0.6.31              
##  [35] rmarkdown_2.21              XVector_0.38.0             
##  [37] pkgconfig_2.0.3             htmltools_0.5.5            
##  [39] MatrixGenerics_1.10.0       dbplyr_2.3.2               
##  [41] fastmap_1.1.1               rlang_1.1.0                
##  [43] rstudioapi_0.14             RSQLite_2.3.1              
##  [45] BiocIO_1.8.0                jquerylib_0.1.4            
##  [47] generics_0.1.3              jsonlite_1.8.4             
##  [49] BiocParallel_1.32.6         dplyr_1.1.2                
##  [51] car_3.1-2                   RCurl_1.98-1.12            
##  [53] magrittr_2.0.3              GO.db_3.16.0               
##  [55] GenomeInfoDbData_1.2.9      Matrix_1.6-4               
##  [57] Rcpp_1.0.10                 munsell_0.5.0              
##  [59] S4Vectors_0.36.2            fansi_1.0.4                
##  [61] abind_1.4-5                 lifecycle_1.0.3            
##  [63] stringi_1.7.12              yaml_2.3.7                 
##  [65] carData_3.0-5               SummarizedExperiment_1.28.0
##  [67] zlibbioc_1.44.0             org.Hs.eg.db_3.16.0        
##  [69] BiocFileCache_2.6.1         grid_4.2.3                 
##  [71] blob_1.2.4                  parallel_4.2.3             
##  [73] crayon_1.5.2                lattice_0.20-45            
##  [75] splines_4.2.3               Biostrings_2.66.0          
##  [77] cowplot_1.1.1               GenomicFeatures_1.50.4     
##  [79] hms_1.1.3                   KEGGREST_1.38.0            
##  [81] knitr_1.44                  pillar_1.9.0               
##  [83] GenomicRanges_1.50.2        rjson_0.2.21               
##  [85] ggsignif_0.6.4              codetools_0.2-19           
##  [87] stats4_4.2.3                fastmatch_1.1-3            
##  [89] XML_3.99-0.14               glue_1.6.2                 
##  [91] evaluate_0.20               png_0.1-8                  
##  [93] vctrs_0.6.2                 gtable_0.3.3               
##  [95] purrr_1.0.1                 cachem_1.0.7               
##  [97] xfun_0.39                   broom_1.0.4                
##  [99] restfulr_0.0.15             rstatix_0.7.2              
## [101] tibble_3.2.1                GenomicAlignments_1.34.1   
## [103] AnnotationDbi_1.60.2        memoise_2.0.1              
## [105] IRanges_2.32.0